##Assignment 2

##Read Data from file and load libraries

library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
df<-read.table("SENIC.txt")

#Creating function to return indices

my_func<-function(x,name){
  col=x[[name]]
  quantile_1=quantile(col,0.25)
  quantile_3=quantile(col,0.75)
  l1=quantile_3+1.5*(quantile_3-quantile_1)
  l2=quantile_1-1.5*(quantile_3-quantile_1)
  indices=which(col>=l1 | col<=l2)
  
  return(indices)
}

##Density Ploting of Infection Risk

indices_infection<-my_func(df,"V3")
outliers_infection<-df$V3[indices_infection]
outliers <- tibble(x = outliers_infection, y = 0)

g<-ggplot(df,aes(x=V3))+geom_histogram(aes(y = ..density..),bins=50, alpha = 0.7,fill = "#6666FF")+geom_density(col="#330066")
g<-g+geom_point(data = outliers, aes(x,y),size=2,colour="#00CCFF",pch=23,fill="#00CCFF")+ggtitle("Density & Histogram Plot of Infection Risk")+labs(x="Infection Risk",y="Density")
g<-g+theme(plot.title = element_text(color="#666666",size=21, hjust=0))
g+annotate(geom="text",x=64,y=0.07,label="Diamond points \n represent outliers")

##Producce graphs for all other variables

dt<-df[c(-1,-8,-9)]
names(dt)<-c('LengthofStay','Age','InfectionRisk','RoutineCulturingRatio',
                    'RoutineChestXrayRatio','NumberofBeds','AverageDailyCensus',
                    'NumberofNurses','AvailableFacilitiesServices')

myplots<-list()

for (name in names(dt))
{
  
  indices<-my_func(dt,name)
  outliers<-dt[[name]][indices]
  outliers_names<-tibble(x=outliers,y=0)
  myplots[[name]]<-ggplot(dt, aes_string(x =name)) + geom_density(col="#330066")+geom_histogram(aes(y=..density..),bins=50, alpha = 0.7,fill = "#6666FF")+geom_point(data=outliers_names,aes(x,y),size=2,color="#00CCFF",pch=23,fill="#00CCFF")
}
grid.arrange(grobs=myplots)

##Scatter Plot

ggplot(df,aes(y=V10,x=V3,col=V6))+geom_point(size=5)+ggtitle("Scatter Plot of Infection Rsk & Number of Nurses colored by Number of Beds")+labs(x="Infection Risk",y="Number of Nurses")

##Plotly Graph

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
indices_infection<-my_func(df,"V3")
outliers_infection<-df$V3[indices_infection]
outliers <- tibble(x = outliers_infection, y = 0)

plot<-ggplot(df,aes(V3))+geom_histogram(aes(y=..density..,alpha=0.7),col="coral",fill="green")+geom_density(col="blue")+ggtitle("Density with Histogram overlay")+geom_point(data = outliers, aes(x,y),sizze=3)
## Warning: Ignoring unknown parameters: sizze
p<-ggplotly(plot)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p

##Plolty Plot Made with Pipe operator

Outlier_indices <- my_func(df,"V3")
Outlier_values<-df$V3[Outlier_indices]
yValue <- rep(0,length(Outlier_values))

hisPlot <- df %>% select(V3) %>% plot_ly(x=~V3,type="histogram")  %>%
  add_markers(x=~Outlier_values, y=~yValue)

hisPlot

##Shiny App